Dependencies

Install the following libraries before running the TDA pipeline.

#install.packages(c("TDA", "Matrix"))

Load libraries

library("Matrix") #package for sparse matrices
library("TDA")

Set working directory

working_dir_path <- setwd("~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline")

Load necessary functions

source("utilities.R")

Create a folder with a subfolder for each data group for storing saved tda computations

You only need to run this block once

dir.create("saved_computations", showWarnings = FALSE)
saved_computations <- concat_path(working_dir_path, "saved_computations")

#choose names for two data groups
groups <- list("group_0", "group_25")

for (i in 1:length(groups)){
  dir.create(concat_path(saved_computations,groups[i]),  showWarnings = FALSE)
}

Create a single csv file for storing necessary parameters (max birth and max death) obtained from all data groups

You only need to run this block once

max_values <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(max_values) <- c("maxbirth", "maxdeath")  

write.csv(max_values, concat_path(saved_computations,"max-values.csv"), row.names=FALSE)

Choose cell types

#specify location path of the file with the cell type codes and their colors
cell_types_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Discretization/cell_colors.csv"

#read all cell type codes from your designated file
cell_types_file <- read.csv(cell_types_path)
all_cell_types <- cell_types_file$Code

# indicate cell types for only including specific cells 
cell_types <- all_cell_types[2]   # this can either be a single value or a vector

index <- NaN    # default index for cell types is the last column

sprintf("Cell types used: %s", paste(cell_types, collapse=", "))
## [1] "Cell types used: 1"

I. Choose a data group and its location

#choose a group folder where to save computations
group <- "group_0"

# specify location path of the csv files of that group
csv_dir_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Discretized-images/0"

II. Compute persistence diagrams and save them

We use Delaunay complex filtration to compute persistence diagrams

compute_PDs(csv_dir_path, group, saved_computations, cell_types, index)
## [1] "Processing file 0_dox_1.csv"
## [1] "Processing file 0_dox_10.csv"
## [1] "Processing file 0_dox_11.csv"
## [1] "Processing file 0_dox_12.csv"
## [1] "Processing file 0_dox_13.csv"
## [1] "Processing file 0_dox_14.csv"
## [1] "Processing file 0_dox_15.csv"
## [1] "Processing file 0_dox_16.csv"
## [1] "Processing file 0_dox_2.csv"
## [1] "Processing file 0_dox_3.csv"
## [1] "Processing file 0_dox_4.csv"
## [1] "Processing file 0_dox_5.csv"
## [1] "Processing file 0_dox_6.csv"
## [1] "Processing file 0_dox_7.csv"
## [1] "Processing file 0_dox_8.csv"
## [1] "Processing file 0_dox_9.csv"
## [1] "Persistence diagrams are saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0_PDs.RData"

III. Determine the max birth and max death among all data files

This is needed to generate plots in both data groups with the same scale

#If you want to generate plots in both data groups with the same scale, then before running this code block, compute persistence diagrams for all data groups (i.e choose another data group and specify its location in I-block, and compute its persistence diagrams in II-block) 

max_values <- read.csv(concat_path(saved_computations, "max-values.csv"))

max_birth <- max(max_values$maxbirth)
max_death <- max(max_values$maxdeath)

Plot persistence diagrams

save_plot <- TRUE #set FALSE if you don't want to save plots

plot_PDs(group, max_birth, max_death, saved_computations, save_plot)
## [1] "Persistence diagrams plots are saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0"

Plot representative cycles that persist (live) over a certain threshold

#choose a persistence threshold
persist_threshold <- 20 

save_plot <- TRUE #set FALSE if you don't want to save plots

#plot representative cycles with persistence above the chosen threshold
plot_representative_cycles(csv_dir_path, cell_types, persist_threshold, group, saved_computations, save_plot)
## [1] "Processing file 0_dox_1.csv"
## # Generated complex of size: 231

## [1] "Processing file 0_dox_10.csv"
## # Generated complex of size: 1089

## [1] "Processing file 0_dox_11.csv"
## # Generated complex of size: 1925

## [1] "Processing file 0_dox_12.csv"
## # Generated complex of size: 2901

## [1] "Processing file 0_dox_13.csv"
## # Generated complex of size: 499

## [1] "Processing file 0_dox_14.csv"
## # Generated complex of size: 1263

## [1] "Processing file 0_dox_15.csv"
## # Generated complex of size: 1319

## [1] "Processing file 0_dox_16.csv"
## # Generated complex of size: 2281

## [1] "Processing file 0_dox_2.csv"
## # Generated complex of size: 669

## [1] "Processing file 0_dox_3.csv"
## # Generated complex of size: 1819

## [1] "Processing file 0_dox_4.csv"
## # Generated complex of size: 1771

## [1] "Processing file 0_dox_5.csv"
## # Generated complex of size: 565

## [1] "Processing file 0_dox_6.csv"
## # Generated complex of size: 1387

## [1] "Processing file 0_dox_7.csv"
## # Generated complex of size: 3089

## [1] "Processing file 0_dox_8.csv"
## # Generated complex of size: 2703

## [1] "Processing file 0_dox_9.csv"
## # Generated complex of size: 645

## [1] "Representative cycle are saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0"

IV. Compute persistence landscapes and save them

You need to have persistence diagrams computed first

#choose a discretization step
discr_step <- 0.3

compute_PLs(group, max_birth, max_death, discr_step, saved_computations)
## [1] "Persistence landscapes are saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0_PLs.RData"

Plot persistence landscapes

save_plot <- TRUE #set FALSE if you don't want to save plots

plot_PLs(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Persistence landscape plots are saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0"

V. Compute the average persistence landscape and save it

You need to have persistence landscapes computed first

compute_avgPL(group, max_birth, max_death, discr_step, saved_computations)
## [1] "Average persistence landscape is saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0_avgPL.RData"

Plot the average persistence landscape

save_plot <- TRUE #set FALSE if you don't want to save plots

plot_avgPL(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Average persistence landscape plot is saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0"

At this point, you need to have computed persistence diagrams, persistence landscapes, and average persistence landscapes for BOTH groups


VI. Plot the difference of average persistence landscapes of two data groups

You need to have computed average persistence landscapes for both groups

save_plot <- TRUE #set FALSE if you don't want to save plots

plot_avgPLs_difference(groups, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Plot of the difference of two average persistence landscapes is saved in /Users/Iryna/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations"

VII. Perform a permutation test on persistence landscapes from two data groups

You need to have computed persistence landscapes for both groups

#number of permutations
num_repeats <- 10000

permutation_test_for_PLs(groups, saved_computations, num_repeats)
## [1] "p-value 0.0005:"